library(tidyverse)
library(sf)
library(viridis)
library(lubridate)
library(tsibble)
library(fable)
library(fabletools)
library(feasts)
library(janitor)
library(rnaturalearth)
library(rnaturalearthdata)
library(ggridges)
library(knitr)
library(forecast)
data = read_csv("dynamic_supply_chain_logistics_dataset.csv", show_col_types = FALSE) %>%
rename(lon = vehicle_gps_longitude, lat = vehicle_gps_latitude) %>%
mutate(
lon = suppressWarnings(as.numeric(lon)),
lat = suppressWarnings(as.numeric(lat)),
timestamp = lubridate::ymd_hms(timestamp, quiet = TRUE),
delay_hours = pmax(suppressWarnings(as.numeric(delivery_time_deviation)), 0)
) %>%
filter(!is.na(lon), !is.na(lat), !is.na(timestamp))Forecast Reconciliation for SCM
1 Libraries and Loading Data
2 Visualisations
We provide delay probabilities overlaid on the US map, and a subset of it on the SoCal region.
# Base map
us = ne_states(country = "United States of America",
returnclass = "sf")
us_conus = subset(us, !name %in% c("Alaska","Hawaii","Puerto Rico")) |>
st_transform(4326)
pts = st_as_sf(data, coords = c("lon","lat"), crs = 4326, remove = FALSE)
# Clip points inside
inside = lengths(st_within(pts, us_conus)) > 0
pts_in = pts[inside, , drop = FALSE]
# thin for legibility
set.seed(1)
pts_plot = if (nrow(pts_in) > 80000) pts_in[sample(nrow(pts_in), 80000), ] else pts_in
us_outline = st_cast(us_conus, "MULTILINESTRING")
# Plot delay probability
color_var = "delay_probability"
has_color = color_var %in% names(pts_plot)
p = ggplot() +
geom_sf(data = us_conus, fill = "grey98", color = "grey90", linewidth = 0.2) +
{ if (has_color)
geom_point(data = st_drop_geometry(pts_plot),
aes(x = lon, y = lat, color = .data[[color_var]]),
alpha = 0.35, size = 0.25)
else
geom_point(data = st_drop_geometry(pts_plot),
aes(x = lon, y = lat),
color = "steelblue", alpha = 0.35, size = 0.25)
} +
geom_sf(data = us_outline, fill = NA, color = "grey20", linewidth = 0.3) +
coord_sf(xlim = st_bbox(us_conus)[c("xmin","xmax")],
ylim = st_bbox(us_conus)[c("ymin","ymax")],
expand = FALSE, clip = "on") +
{ if (has_color) scale_color_viridis_c(name = color_var, limits = c(0,1)) } +
labs(title = "Delay Probabilities: All States",
x = "Longitude", y = "Latitude") +
theme_minimal(base_size = 12) +
theme(panel.grid = element_blank())
print(p)2.1 Southern California
# California + SoCal bbox with border
# Polygon
ca = rnaturalearth::ne_states(country = "United States of America",
returnclass = "sf") |>
subset(name == "California") |>
st_transform(4326)
# points as sf
pts_all = st_as_sf(data, coords = c("lon","lat"), crs = 4326, remove = FALSE)
# only in CA
pts_ca = st_intersection(pts_all, ca)
# crop
socal_bbox = c(xmin = -121, ymin = 32, xmax = -114, ymax = 36) # note: st_bbox expects xmin,ymin,xmax,ymax
bbox_poly = st_as_sfc(st_bbox(socal_bbox, crs = sf::st_crs(4326)))
pts_socal = st_intersection(pts_ca, bbox_poly)
cat("CA points:", nrow(pts_ca), " | SoCal-in-CA points:", nrow(pts_socal), "\n")CA points: 613 | SoCal-in-CA points: 408
# plot
ca_outline = sf::st_cast(ca, "MULTILINESTRING")
p_socal = ggplot() +
geom_sf(data = ca, fill = "grey98", color = "grey85", linewidth = 0.2) +
# bubble layer
geom_sf(
data = pts_socal,
aes(color = delay_probability, size = delay_probability),
alpha = 0.55
) +
geom_sf(data = ca_outline, fill = NA, color = "grey20", linewidth = 0.4) +
coord_sf(xlim = c(socal_bbox["xmin"], socal_bbox["xmax"]),
ylim = c(socal_bbox["ymin"], c(socal_bbox["ymax"])),
expand = FALSE, clip = "on") +
scale_color_viridis_c(name = "delay_probability", limits = c(0, 1)) +
scale_size_continuous(range = c(2, 5), name = "delay_probability", guide = "none") +
labs(title = "Delay Probabilities: Southern California",
x = "Longitude", y = "Latitude") +
theme_minimal(base_size = 12) +
theme(panel.grid = element_blank())
print(p_socal)3 Variables
For reconciliation, we should use an additive target variable. Thus, we define a new target based on target D (Delivery Time Deviation) as total delay time, given as \[T=\sum \max\{D_i, 0\},\quad D_i=\text{delivery time deviation}.\]For this reason, we add the delay_time variable.
data = data %>% mutate(delay_time = pmax(delivery_time_deviation, 0))We also add a State variable for the two-level hierarchy on State/cell_id.
# 1) Make fields numeric + additive ingredient (row-level)
data = data %>%
mutate(
lon = suppressWarnings(as.numeric(lon)),
lat = suppressWarnings(as.numeric(lat)),
timestamp = ymd_hms(timestamp, quiet = TRUE),
delay_hours = pmax(as.numeric(delivery_time_deviation), 0)
) %>%
filter(!is.na(lon), !is.na(lat), !is.na(timestamp))
# 2) Attach US state (for two-level hierarchy: TOTAL → state → cell_id)
us = ne_states(country = "United States of America", returnclass = "sf")
us_conus = subset(us, !name %in% c("Alaska","Hawaii","Puerto Rico")) %>% st_transform(4326)
pts = st_as_sf(data, coords = c("lon","lat"), crs = 4326, remove = FALSE)
data = st_join(pts, us_conus["name"], left = TRUE) %>% st_drop_geometry()
names(data)[names(data) == "name"] = "state"
data = filter(data, !is.na(state))4 Forecast
We first generate the hierarchical time series data associated with data. We group the data by months as well to get better totals; we tried hourly (default) and daily but don’t get any good results.
# Make month + cargo
data = data %>%
mutate(
Month = yearmonth(timestamp),
cargo = if_else(as.numeric(cargo_condition_status) <= 0.5, "Poor", "Good")
) %>%
filter(!is.na(state))
# One row per Month × state × cargo (sum within month)
monthly = data %>%
group_by(Month, state, cargo) %>%
summarise(total_delay_hours = sum(delay_hours, na.rm = TRUE), .groups = "drop")
# complete panel
ts_m = monthly %>%
as_tsibble(index = Month, key = c(state, cargo)) %>%
fill_gaps(.full = TRUE) %>%
mutate(total_delay_hours = coalesce(total_delay_hours, 0)) %>%
aggregate_key(state / cargo, total_delay_hours = sum(total_delay_hours))4.1 Train-Test Split
# Split (last 12 months)
n_test = 12
cutoff = max(ts_m$Month, na.rm = TRUE) - n_test
train_data = ts_m %>% filter(Month <= cutoff)
test_data = ts_m %>% filter(Month > cutoff)When we try to fit an ETS model, we generally get NULL models for a lot of these entries, so we counteract this by doing the specified ETS model for the non-NULL models, and set the rest as a TSLM model.
ETS (additive) captures level/trend/season for non-negative flows and adapts well when there’s actual signal.
TSLM(~1) (intercept-only) is a safe fallback for all-zero or ultra-sparse series; it yields a valid (typically zero) forecast instead of a
, keeping the hierarchy intact for reconciliation.
# Base fit
fit = train_data %>% model(ETS = ETS(total_delay_hours ~ error("A") + season("A")))
# Replace NULLs in-place with TSLM(~1)
fb = train_data %>% model(ETS = TSLM(total_delay_hours ~ 1))
idx = is_null_model(fit$ETS)
fit$ETS[idx] = fb$ETS[idx]4.2 Reconciliation
# Reconcile & forecast
recon_fc = fit %>%
reconcile(
BU = bottom_up(ETS),
OLS = min_trace(ETS, "ols"),
WLS = min_trace(ETS, "wls_struct")
) %>%
forecast(h = n_test)4.2.1 Diagnostics
rt = recon_fc %>%
as_tibble() %>%
filter(.model %in% c("BU","OLS","WLS"))
# TOTAL vs sum of STATES (each month)
totals = rt %>% filter(is_aggregated(state), is_aggregated(cargo)) %>%
select(.model, Month, total = .mean)
sum_states = rt %>% filter(!is_aggregated(state), is_aggregated(cargo)) %>%
group_by(.model, Month) %>%
summarise(sum_states = sum(.mean, na.rm = TRUE), .groups = "drop")
left_join(totals, sum_states, by = c(".model","Month")) %>%
mutate(diff = round(total - sum_states, 10)) %>%
group_by(.model) %>%
summarise(max_abs_diff = max(abs(diff), na.rm = TRUE), .groups = "drop")# A tibble: 3 × 2
.model max_abs_diff
<chr> <dbl>
1 BU 0
2 OLS 0
3 WLS 0
# For each state: parent vs sum of its cargo children
parents = rt %>% filter(!is_aggregated(state), is_aggregated(cargo)) %>%
select(.model, Month, state, parent = .mean)
children = rt %>% filter(!is_aggregated(state), !is_aggregated(cargo)) %>%
group_by(.model, Month, state) %>%
summarise(sum_children = sum(.mean, na.rm = TRUE), .groups = "drop")
left_join(parents, children, by = c(".model","Month","state")) %>%
mutate(diff = round(parent - sum_children, 10)) %>%
group_by(.model) %>%
summarise(max_abs_diff = max(abs(diff), na.rm = TRUE), .groups = "drop")# A tibble: 3 × 2
.model max_abs_diff
<chr> <dbl>
1 BU 0
2 OLS 0
3 WLS 0
5 Comparison
acc = accuracy(recon_fc, test_data)
res = acc %>%
group_by(.model) %>%
summarise(MAE = mean(MAE, na.rm = TRUE),
RMSE = mean(RMSE, na.rm = TRUE)) %>%
arrange(RMSE)
kable(res, align = "ccc")| .model | MAE | RMSE |
|---|---|---|
| OLS | 13.67204 | 17.13070 |
| WLS | 13.72854 | 17.25368 |
| ETS | 13.86745 | 17.37866 |
| BU | 13.99175 | 17.61708 |
Seems that OLS is the best performing model.
5.1 Plot
# History: California state total (cargo aggregated)
ca_hist = ts_m %>%
filter(state == "California", is_aggregated(cargo))
# Reconciled forecasts: California state total (cargo aggregated)
recon_ca = recon_fc %>%
filter(state == "California", is_aggregated(cargo))
autoplot(recon_ca, level = NULL) +
autolayer(ca_hist, total_delay_hours, alpha = 0.3) +
labs(
title = "Reconciled forecasts by method – California (state total)",
y = "Total delay-hours (monthly)"
) +
theme_bw()Overall seems like even with reconciliation, we don’t get a more accurate model. This could be attributed to the input variable not being very “significant” in difference.
5.2 Post-Prediction EDA: Delay Time vs Cargo Condition Status
data = data %>% mutate(cargo =
if_else(as.numeric(cargo_condition_status) <= 0.5, "Poor", "Good"))
ggplot(data, aes(x = cargo, y = delay_time, fill = cargo)) +
geom_boxplot(outlier.alpha = 0.15, width = 0.65) +
labs(title = "Delay time by cargo condition",
x = NULL, y = "Delay time (hours)") +
theme_bw()ggplot(data, aes(x = delay_time, y = cargo, fill = cargo)) +
geom_density_ridges(alpha = 0.8, scale = 1.1, rel_min_height = 0.001, color = "white") +
labs(title = "Distribution of delay time by cargo condition",
x = "Delay time (hours)", y = NULL) +
theme_bw()So really not much of a difference overall; this is not a very relevant feature at least.
6 ARIMA
We try a simple ARIMA model as a baseline comparison.
monthly = data %>%
filter(state == "California") %>%
mutate(Month = yearmonth(timestamp)) %>%
group_by(Month) %>%
summarise(y = sum(delay_time, na.rm = TRUE), .groups = "drop") %>%
arrange(Month)
# Make the month index complete and zero-fill any gaps
monthly_full = monthly %>%
as_tsibble(index = Month) %>%
fill_gaps() %>%
mutate(y = coalesce(y, 0)) %>%
as_tibble()
# Convert to base R ts (freq = 12) so it prints in the Jan..Dec matrix form
start_y = year(as_date(min(monthly_full$Month)))
start_m = month(as_date(min(monthly_full$Month)))
y_ts = ts(monthly_full$y, start = c(start_y, start_m), frequency = 12)
y_ts Jan Feb Mar Apr May Jun Jul
2021 41.26490 33.66997 128.58444 46.87716 98.83191 123.12791 111.75226
2022 75.56938 46.94707 99.82337 118.99028 116.92938 77.03781 53.17551
2023 89.84912 70.53951 63.66198 60.81771 126.03886 67.91650 61.50690
2024 36.24753 48.74078 95.24920 41.87473 120.24339 84.70594 81.51764
Aug Sep Oct Nov Dec
2021 55.71658 92.69303 82.79664 57.03349 55.54021
2022 53.52959 27.81023 32.14640 101.23152 98.54555
2023 96.46829 75.34859 60.78409 119.15343 42.49698
2024 105.28985
ARIMA/SARIMA is appropriate here because the monthly delay-hours series is non-stationary (level shifts, uneven mean) and appears to have a seasonal pattern; ARIMA handles this by applying regular and seasonal differencing (\(d\), \(D\)) before fitting AR/MA dynamics. Using auto.arima will automatically choose the needed differencing and seasonal terms, with residual checks confirming stationarity after differencing.
6.1 Fitting and Residuals
# Split (to before 2024)
freq = frequency(y_ts)
ti = time(y_ts)
# indices by year
idx_train = which(floor(ti) < 2024) # 2021–2023
idx_test = which(floor(ti) == 2024) # 2024 only
# build contiguous ts objects
y_train = ts(y_ts[idx_train], start = start(y_ts), frequency = freq)
y_test = ts(y_ts[idx_test], start = c(2024, 1), frequency = freq)
# quick sanity
c(length_train = length(y_train), length_test = length(y_test))length_train length_test
36 8
start(y_train); end(y_train)[1] 2021 1
[1] 2023 12
start(y_test); end(y_test)[1] 2024 1
[1] 2024 8
fit = auto.arima(y_train , stepwise = FALSE, approximation = FALSE, stationary = FALSE)
checkresiduals(fit)
Ljung-Box test
data: Residuals from ARIMA(0,0,0) with non-zero mean
Q* = 10.512, df = 7, p-value = 0.1614
Model df: 0. Total lags used: 7
6.2 Interpretation
We find that the algorithm classified ARIMA(0, 0, 0) meaning there really is no useful prediction method without incorporating other variables. For one final check we will try using the \(x\)-regressor as cargo_condition_status.
7 With \(x\)-regressor
# build monthly target + raw regressor (mean cargo condition per month)
monthly_x = data %>%
mutate(Month = yearmonth(timestamp),
ccs = as.numeric(cargo_condition_status)) %>%
group_by(Month) %>%
summarise(
y = sum(delay_time, na.rm = TRUE), # target
ccs_mean = mean(ccs, na.rm = TRUE), # raw regressor (0..1)
.groups = "drop"
) %>%
arrange(Month) %>%
# make the month index complete; zero-fill y; fill any missing ccs_mean
as_tsibble(index = Month) %>%
fill_gaps() %>%
mutate(
y = coalesce(y, 0),
ccs_mean = tidyr::replace_na(ccs_mean, NA_real_)
) %>%
as_tibble() %>%
fill(ccs_mean, .direction = "downup") %>% # forward/back fill if needed
mutate(ccs_mean = coalesce(ccs_mean, mean(ccs_mean, na.rm = TRUE)))
# convert to ts objects (freq = 12)
start_y = year(as_date(min(monthly_x$Month)))
start_m = month(as_date(min(monthly_x$Month)))
y_ts = ts(monthly_x$y, start = c(start_y, start_m), frequency = 12)
x_ts = ts(monthly_x$ccs_mean, start = c(start_y, start_m), frequency = 12)
# calendar split: train = 2021–2023, test = 2024
ti = time(y_ts)
freq = frequency(y_ts)
idx_train = which(floor(ti) < 2024)
idx_test = which(floor(ti) == 2024)
y_train = ts(y_ts[idx_train], start = start(y_ts), frequency = freq)
x_train = ts(x_ts[idx_train], start = start(x_ts), frequency = freq)
y_test = ts(y_ts[idx_test], start = c(2024, 1), frequency = freq)
x_test = ts(x_ts[idx_test], start = c(2024, 1), frequency = freq)
# fit ARIMA with xreg (raw cargo condition)
fit_x = auto.arima(y_train, xreg = x_train, stepwise = FALSE, approximation = FALSE)
checkresiduals(fit_x)
Ljung-Box test
data: Residuals from Regression with ARIMA(0,0,0) errors
Q* = 7.636, df = 7, p-value = 0.3658
Model df: 0. Total lags used: 7
Still ARIMA(0, 0, 0), so we can conclude that at this monthly aggregation, there’s no exploitable temporal structure and the raw cargo condition doesn’t add predictive power (as used).
In other words, at the monthly level, cargo condition has no statistically significant linear effect on delay-hours.